#setwd('/afs/inf.ed.ac.uk/user/s17/s1725186/Documents/PhD-Models/FirstPUModel/RMarkdowns')

library(tidyverse) ; library(reshape2) ; library(glue) ; library(plotly) ; library(dendextend)
library(RColorBrewer) ; library(viridis) ; require(gridExtra) ; library(GGally)
library(expss)
library(polycor)
library(foreach) ; library(doParallel)
library(knitr)
library(biomaRt)
library(anRichment) ; library(BrainDiseaseCollection)
suppressWarnings(suppressMessages(library(WGCNA)))

SFARI_colour_hue = function(r) {
  pal = c('#FF7631','#FFB100','#E8E328','#8CC83F','#62CCA6','#59B9C9','#b3b3b3','#808080','gray','#d9d9d9')[r]
}

Load preprocessed dataset (preprocessing code in 19_10_14_data_preprocessing.Rmd) and clustering (pipeline in 19_10_21_WGCNA.Rmd)

# Gandal dataset
load('./../Data/preprocessed_data.RData')
datExpr = datExpr %>% data.frame
DE_info = DE_info %>% data.frame


# GO Neuronal annotations: regex 'neuron' in GO functional annotations and label the genes that make a match as neuronal
GO_annotations = read.csv('./../Data/genes_GO_annotations.csv')
GO_neuronal = GO_annotations %>% filter(grepl('neuron', go_term)) %>% 
              mutate('ID'=as.character(ensembl_gene_id)) %>% 
              dplyr::select(-ensembl_gene_id) %>% distinct(ID) %>%
              mutate('Neuronal'=1)


# SFARI Genes
SFARI_genes = read_csv('./../../../SFARI/Data/SFARI_genes_08-29-2019_w_ensembl_IDs.csv')
SFARI_genes = SFARI_genes[!duplicated(SFARI_genes$ID) & !is.na(SFARI_genes$ID),]


# Clusterings
clusterings = read_csv('./../Data/clusters.csv')


# Update DE_info with SFARI and Neuronal information
genes_info = DE_info %>% mutate('ID'=rownames(.)) %>% left_join(SFARI_genes, by='ID') %>% 
  mutate(`gene-score`=ifelse(is.na(`gene-score`), 'None', `gene-score`)) %>%
  left_join(GO_neuronal, by='ID') %>% left_join(clusterings, by='ID') %>%
  mutate(Neuronal=ifelse(is.na(Neuronal), 0, Neuronal)) %>%
  mutate(gene.score=ifelse(`gene-score`=='None' & Neuronal==1, 'Neuronal', `gene-score`), 
         significant=padj<0.05 & !is.na(padj))

# Add gene symbol
getinfo = c('ensembl_gene_id','external_gene_id')
mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl',
               host='feb2014.archive.ensembl.org') ## Gencode v19
gene_names = getBM(attributes=getinfo, filters=c('ensembl_gene_id'), values=genes_info$ID, mart=mart)

genes_info = genes_info %>% left_join(gene_names, by=c('ID'='ensembl_gene_id'))


clustering_selected = 'DynamicHybrid'
genes_info$Module = genes_info[,clustering_selected]

dataset = read.csv(paste0('./../Data/dataset_', clustering_selected, '.csv'))
dataset$Module = dataset[,clustering_selected]


rm(DE_info, GO_annotations, clusterings, getinfo, mart, dds)


Relation to external clinical traits


Quantifying module-trait associations


Using the hetcor function, that calculates Pearson, polyserial or polychoric correlations depending on the type of variables involved.

datTraits = datMeta %>% dplyr::select(Diagnosis, Region, Sex, Age, PMI, RNAExtractionBatch) %>%
            dplyr::rename('ExtractionBatch' = RNAExtractionBatch)

# Recalculate MEs with color labels
ME_object = datExpr %>% t %>% moduleEigengenes(colors = genes_info$Module)
MEs = orderMEs(ME_object$eigengenes)

# Calculate correlation between eigengenes and the traits and their p-values
moduleTraitCor = MEs %>% apply(2, function(x) hetcor(x, datTraits)$correlations[1,-1]) %>% t
rownames(moduleTraitCor) = colnames(MEs)
colnames(moduleTraitCor) = colnames(datTraits)
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nrow(datExpr))

# Create text matrix for the Heatmap
textMatrix = paste0(signif(moduleTraitCor, 2), ' (', signif(moduleTraitPvalue, 1), ')')
dim(textMatrix) = dim(moduleTraitCor)

# In case there are any NAs
if(sum(!complete.cases(moduleTraitCor))>0){
  print(paste0(sum(is.na(moduleTraitCor)),' correlation(s) could not be calculated')) 
}

rm(ME_object)

I’m going to select all the modules that have an absolute correlation higher than 0.9 with Diagnosis to study them

# Sort moduleTraitCor by Diagnosis
moduleTraitCor = moduleTraitCor[order(moduleTraitCor[,1], decreasing=TRUE),]
moduleTraitPvalue = moduleTraitPvalue[order(moduleTraitCor[,1], decreasing=TRUE),]

# Create text matrix for the Heatmap
textMatrix = paste0(signif(moduleTraitCor, 2), ' (', signif(moduleTraitPvalue, 1), ')')
dim(textMatrix) = dim(moduleTraitCor)


labeledHeatmap(Matrix = moduleTraitCor, xLabels = names(datTraits), yLabels =  gsub('ME','',rownames(moduleTraitCor)), 
               yColorWidth=0, colors = brewer.pal(11,'PiYG'), bg.lab.y = gsub('ME','',rownames(moduleTraitCor)),
               textMatrix = textMatrix, setStdMargins = FALSE, cex.text = 0.8, cex.lab.y = 0.75, zlim = c(-1,1),
               main = paste('Module-Trait relationships'))

diagnosis_cor = data.frame('Module' = gsub('ME','',rownames(moduleTraitCor)),
                           'MTcor' = moduleTraitCor[,'Diagnosis'],
                           'MTpval' = moduleTraitPvalue[,'Diagnosis'])

genes_info = genes_info %>% left_join(diagnosis_cor, by='Module')

rm(moduleTraitPvalue, datTraits, textMatrix, diagnosis_cor)


Studying the modules with the highest absolute correlation to Diagnosis


The modules consist mainly of points with very high (absolute) values in PC2 (which we know is related to lfc), so this result is consistent with the high correlation between Module and Diagnosis, although some of the points with the highest PC2 values do not belong to these top modules

top_modules = gsub('ME','',rownames(moduleTraitCor)[abs(moduleTraitCor[,'Diagnosis'])>0.9])

cat(paste0('Top modules selected: ', paste(top_modules, collapse=', '),'\n'))
## Top modules selected: #73B000, #EE8045, #00B9E3, #84AD00
pca = datExpr %>% prcomp

plot_data = data.frame('ID'=rownames(datExpr), 'PC1' = pca$x[,1], 'PC2' = pca$x[,2]) %>%
            left_join(dataset, by='ID') %>% left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>%
            dplyr::select(ID, external_gene_id, PC1, PC2, Module, gene.score) %>%
            mutate(ImportantModules = ifelse(Module %in% top_modules, as.character(Module), 'Others')) %>%
            mutate(color = ifelse(ImportantModules=='Others','gray',ImportantModules),
                   alpha = ifelse(ImportantModules=='Others', 0.2, 0.4),
                   gene_id = paste0(ID, ' (', external_gene_id, ')'))

table(plot_data$ImportantModules)
## 
## #00B9E3 #73B000 #84AD00 #EE8045  Others 
##    1733    1461     891     884   11178
ggplotly(plot_data %>% ggplot(aes(PC1, PC2, color=ImportantModules)) + 
         geom_point(alpha=plot_data$alpha, color=plot_data$color, aes(ID=gene_id)) + theme_minimal() + 
           ggtitle('Modules with strongest relation to Diagnosis'))
rm(pca)




Module Membership vs Gene Significance


create_plot = function(module){
  
  plot_data = dataset %>% dplyr::select(ID, paste0('MM.',gsub('#','',module)), GS, gene.score) %>% filter(dataset$Module==module)
  colnames(plot_data)[2] = 'Module'
  
  SFARI_colors = as.numeric(names(table(as.character(plot_data$gene.score)[plot_data$gene.score!='None'])))
  
  p = ggplotly(plot_data %>% ggplot(aes(Module, GS, color=gene.score)) + geom_point(alpha=0.5, aes(ID=ID)) + ylab('Gene Significance') +
               scale_color_manual(values=SFARI_colour_hue(r=c(SFARI_colors,8))) + theme_minimal() + xlab('Module Membership') +
               ggtitle(paste0('Module ', module,'  (MTcor = ', round(moduleTraitCor[paste0('ME',module),1],2),')')))
  
  return(p)
}

create_plot(top_modules[1])
create_plot(top_modules[2])
create_plot(top_modules[3])
create_plot(top_modules[4])
rm(create_plot)




SFARI Genes


List of top SFARI Genes in top modules ordered by SFARI score and Gene Significance

table_data = dataset %>% left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>%
             dplyr::select(ID, external_gene_id, GS, gene.score, Module) %>% arrange(gene.score, desc(abs(GS))) %>%
             dplyr::rename('Ensembl ID'=ID, 'Gene Symbol'=external_gene_id, 
                    'SFARI score'=gene.score, 'Gene Significance'=GS)

kable(table_data %>% filter(Module == top_modules[1] & `SFARI score` %in% c(1,2,3)) %>% dplyr::select(-Module),
      caption=paste0('Top SFARI Genes for Module ', top_modules[1]))
Top SFARI Genes for Module #73B000
Ensembl ID Gene Symbol Gene Significance SFARI score
ENSG00000189056 RELN -0.1016272 1
ENSG00000169432 SCN9A 0.4140399 2
ENSG00000165186 PTCHD1 0.3027356 2
ENSG00000083168 KAT6A 0.5999494 3
ENSG00000205581 HMGN1 0.5655605 3
ENSG00000158321 AUTS2 0.5297062 3
ENSG00000259207 ITGB3 0.4676222 3
ENSG00000149571 KIRREL3 0.3970895 3
ENSG00000113742 CPEB4 0.3724287 3
ENSG00000196839 ADA 0.3686762 3
ENSG00000132510 KDM6B 0.3312280 3
ENSG00000146938 NLGN4X 0.2547162 3
ENSG00000118432 CNR1 0.2248206 3
ENSG00000168036 CTNNB1 0.1598633 3
ENSG00000166148 AVPR1A 0.1138557 3
ENSG00000044090 CUL7 0.0391425 3
ENSG00000151623 NR3C2 -0.0150334 3
kable(table_data %>% filter(Module == top_modules[2] & `SFARI score` %in% c(1,2,3)) %>% dplyr::select(-Module),
      caption=paste0('Top SFARI Genes for Module ', top_modules[2]))
Top SFARI Genes for Module #EE8045
Ensembl ID Gene Symbol Gene Significance SFARI score
ENSG00000110066 SUV420H1 0.6536202 1
ENSG00000141431 ASXL3 0.5017802 1
ENSG00000038382 TRIO 0.6260413 2
ENSG00000141027 NCOR1 0.4643907 2
ENSG00000204764 RANBP17 0.2998930 2
ENSG00000168769 TET2 0.8170356 3
ENSG00000181090 EHMT1 0.7732158 3
ENSG00000162946 DISC1 0.6648966 3
ENSG00000197724 PHF2 0.3656794 3
ENSG00000145020 AMT 0.3410208 3
ENSG00000101004 NINL 0.2768068 3
ENSG00000130940 CASZ1 0.2636101 3
ENSG00000128573 FOXP2 0.2493147 3
ENSG00000070047 PHRF1 0.2458754 3
ENSG00000112902 SEMA5A 0.2184377 3
ENSG00000165995 CACNB2 0.1700655 3
ENSG00000172780 RAB43 0.1656377 3
ENSG00000102974 CTCF 0.1270874 3
kable(table_data %>% filter(Module == top_modules[3] & `SFARI score` %in% c(1,2,3)) %>% dplyr::select(-Module),
      caption=paste0('Top SFARI Genes for Module ', top_modules[3]))
Top SFARI Genes for Module #00B9E3
Ensembl ID Gene Symbol Gene Significance SFARI score
ENSG00000136535 TBR1 -0.6536200 1
ENSG00000136531 SCN2A -0.5374597 1
ENSG00000145362 ANK2 -0.4367876 1
ENSG00000036257 CUL3 -0.1935419 1
ENSG00000174469 CNTNAP2 -0.7002690 2
ENSG00000061676 NCKAP1 -0.6906433 2
ENSG00000119866 BCL11A -0.3844250 2
ENSG00000157445 CACNA2D3 -0.3393135 2
ENSG00000144619 CNTN4 -0.3109404 2
ENSG00000114861 FOXP1 0.2551455 2
ENSG00000166206 GABRB3 -0.1793562 2
ENSG00000074590 NUAK1 -0.8235832 3
ENSG00000144285 SCN1A -0.8000233 3
ENSG00000196876 SCN8A -0.7887782 3
ENSG00000170579 DLGAP1 -0.7789832 3
ENSG00000132294 EFR3A -0.7500147 3
ENSG00000078328 RBFOX1 -0.7311006 3
ENSG00000003147 ICA1 -0.7126337 3
ENSG00000197535 MYO5A -0.7116545 3
ENSG00000175497 DPP10 -0.7059507 3
ENSG00000182621 PLCB1 -0.6917670 3
ENSG00000021645 NRXN3 -0.6715730 3
ENSG00000166501 PRKCB -0.6151170 3
ENSG00000139726 DENR -0.5936537 3
ENSG00000142230 SAE1 -0.5499380 3
ENSG00000183454 GRIN2A -0.5288764 3
ENSG00000185345 PARK2 -0.4980373 3
ENSG00000164506 STXBP5 -0.4854943 3
ENSG00000182771 GRID1 -0.4477343 3
ENSG00000132024 CC2D1A -0.4158439 3
ENSG00000050030 KIAA2022 -0.4001380 3
ENSG00000185008 ROBO2 -0.3078622 3
ENSG00000182256 GABRG3 -0.2878943 3
ENSG00000139174 PRICKLE1 -0.2154883 3
ENSG00000134115 CNTN6 -0.1945703 3
ENSG00000138411 HECW2 -0.1360240 3
ENSG00000149970 CNKSR2 0.0921242 3
ENSG00000140945 CDH13 0.0846523 3
ENSG00000166147 FBN1 -0.0830677 3
ENSG00000171723 GPHN -0.0744071 3
ENSG00000149972 CNTN5 -0.0486109 3
kable(table_data %>% filter(Module == top_modules[4] & `SFARI score` %in% c(1,2,3)) %>% dplyr::select(-Module),
      caption=paste0('Top SFARI Genes for Module ', top_modules[4]))
Top SFARI Genes for Module #84AD00
Ensembl ID Gene Symbol Gene Significance SFARI score
ENSG00000139613 SMARCC2 -0.4329730 2
ENSG00000196557 CACNA1H -0.4127420 2
ENSG00000171759 PAH -0.6896875 3
ENSG00000169918 OTUD7A -0.4706156 3
ENSG00000106290 TAF6 -0.4144602 3
ENSG00000133026 MYH10 -0.3262873 3
ENSG00000101489 CELF4 -0.3169738 3
ENSG00000180914 OXTR -0.1668994 3

Modules with the strongest module-diagnosis correlation should have the highest percentage of SFARI Genes, but this doesn’t seem to be the case (even the opposite may be true)

plot_data = dataset %>% mutate('hasSFARIscore' = gene.score!='None') %>% 
            group_by(Module, MTcor, hasSFARIscore) %>% summarise(p=n()) %>% 
            left_join(dataset %>% group_by(Module) %>% summarise(n=n()), by='Module') %>% 
            mutate(p=round(p/n*100,2)) 

for(i in 1:nrow(plot_data)){
  this_row = plot_data[i,]
  if(this_row$hasSFARIscore==FALSE & this_row$p==100){
    new_row = this_row
    new_row$hasSFARIscore = TRUE
    new_row$p = 0
    plot_data = plot_data %>% rbind(new_row)
  }
}

plot_data = plot_data %>% filter(hasSFARIscore==TRUE)

ggplotly(plot_data %>% ggplot(aes(MTcor, p, size=n)) + geom_smooth(color='gray', se=FALSE) +
         geom_point(color=plot_data$Module, alpha=0.5, aes(id=Module)) + geom_hline(yintercept=mean(plot_data$p), color='gray') +
         xlab('Module-Diagnosis correlation') + ylab('% of SFARI genes') +
         theme_minimal() + theme(legend.position = 'none'))
rm(i, this_row, new_row, plot_data)

Breaking the SFARI genes by score

scores = c(1,2,3,4,5,6,'None')

plot_data = dataset %>% group_by(Module, MTcor, gene.score) %>% summarise(n=n()) %>% 
            left_join(dataset %>% group_by(Module) %>% summarise(N=n()), by='Module') %>% 
            mutate(p=round(n/N*100,2), gene.score = as.character(gene.score))

for(i in 1:nrow(plot_data)){
  this_row = plot_data[i,]
  if(sum(plot_data$Module == this_row$Module)<7){
    missing_scores = which(! scores %in% plot_data$gene.score[plot_data$Module == this_row$Module])
    for(s in missing_scores){
      new_row = this_row
      new_row$gene.score = as.character(s)
      new_row$n = 0
      new_row$p = 0
      plot_data = plot_data %>% rbind(new_row) 
    }
  }
}

plot_data = plot_data %>% filter(gene.score != 'None')

plot_function = function(i){
  i = 2*i-1
  plot_list = list()
  for(j in 1:2){
    plot_list[[j]] = ggplotly(plot_data %>% filter(gene.score==scores[i+j-1]) %>% ggplot(aes(MTcor, p, size=n)) + 
                geom_smooth(color='gray', se=FALSE) +
                geom_point(color=plot_data$Module[plot_data$gene.score==scores[i+j-1]], alpha=0.5, aes(id=Module)) +
                geom_hline(yintercept=mean(plot_data$p[plot_data$gene.score==scores[i+j-1]]), color='gray') +
                xlab('Module-Diagnosis correlation') + ylab('% of SFARI genes') +
                theme_minimal() + theme(legend.position = 'none'))
  }
  p = subplot(plot_list, nrows=1) %>% layout(annotations = list(
    list(x = 0.2 , y = 1.05, text = paste0('SFARI score ', scores[i]), showarrow = F, xref='paper', yref='paper'),
    list(x = 0.8 , y = 1.05, text = paste0('SFARI score ', scores[i+1]), showarrow = F, xref='paper', yref='paper')))
  
  return(p)
}

plot_function(1)
plot_function(2)
plot_function(3)
rm(i, s, this_row, new_row, plot_function)




Module Eigengenes


Since these modules have the strongest relation to autism, this pattern should be reflected in their model eigengenes, having two different behaviours for the samples corresponding to autism and the ones corresponding to control.

In both cases, the Eigengenes separate the behaviour between autism and control samples very clearly!

plot_EGs = function(module){

  plot_data = data.frame('ID' = rownames(MEs), 'MEs' = MEs[,paste0('ME',module)], 'Diagnosis' = datMeta$Diagnosis)

  p = plot_data %>% ggplot(aes(Diagnosis, MEs, fill=Diagnosis)) + geom_boxplot() + theme_minimal() + theme(legend.position='none') +
                    ggtitle(paste0('Module ', module, '  (MTcor=',round(moduleTraitCor[paste0('ME',module),1],2),')'))
  return(p)
}

p1 = plot_EGs(top_modules[1])
p2 = plot_EGs(top_modules[2])
p3 = plot_EGs(top_modules[3])
p4 = plot_EGs(top_modules[4])

grid.arrange(p1, p2, p3, p4, nrow=2)

rm(plot_EGs, p1, p2, p3, p4)




Identifying important genes


Selecting the modules with the highest correlation to Diagnosis, and, from them, the genes with the highest module membership-(absolute) gene significance

*Ordered by \(\frac{MM+|GS|}{2}\)

There aren’t that many SFARI genes in the top genes of the modules and not a single one belonging to scores 1 and 2

create_table = function(module){
  top_genes = dataset %>% left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>% 
              dplyr::select(ID, external_gene_id, paste0('MM.',gsub('#','',module)), GS, gene.score) %>%
              filter(dataset$Module==module) %>% dplyr::rename('MM' = paste0('MM.',gsub('#','',module))) %>% 
              mutate(importance = (MM+abs(GS))/2) %>% arrange(by=-importance) %>% top_n(20)
  return(top_genes)
}

top_genes = list()
for(i in 1:length(top_modules)) top_genes[[i]] = create_table(top_modules[i])

kable(top_genes[[1]], caption=paste0('Top 10 genes for module ', top_modules[1], '  (MTcor = ',
                                  round(moduleTraitCor[paste0('ME',top_modules[1]),1],2),')'))
Top 10 genes for module #73B000 (MTcor = 0.94)
ID external_gene_id MM GS gene.score importance
ENSG00000143384 MCL1 0.8500241 0.9078060 None 0.8789151
ENSG00000158615 PPP1R15B 0.8407717 0.8212537 None 0.8310127
ENSG00000196935 SRGAP1 0.7175838 0.9224538 None 0.8200188
ENSG00000161638 ITGA5 0.7550781 0.8597979 None 0.8074380
ENSG00000148841 ITPRIP 0.7913310 0.8161802 None 0.8037556
ENSG00000120278 PLEKHG1 0.7857882 0.8086459 None 0.7972171
ENSG00000133639 BTG1 0.7819379 0.8082183 None 0.7950781
ENSG00000101493 ZNF516 0.6925114 0.8811710 None 0.7868412
ENSG00000150457 LATS2 0.7097060 0.8499030 None 0.7798045
ENSG00000097007 ABL1 0.7612757 0.7941289 None 0.7777023
ENSG00000253731 PCDHGA6 0.7450788 0.8036535 None 0.7743662
ENSG00000106366 SERPINE1 0.7535342 0.7911667 4 0.7723505
ENSG00000198795 ZNF521 0.7862020 0.7397450 None 0.7629735
ENSG00000154640 BTG3 0.6921295 0.8306872 None 0.7614084
ENSG00000138166 DUSP5 0.7541208 0.7643020 None 0.7592114
ENSG00000072364 AFF4 0.7413482 0.7716602 6 0.7565042
ENSG00000204569 PPP1R10 0.7470986 0.7645752 None 0.7558369
ENSG00000135913 USP37 0.6855044 0.8254166 None 0.7554605
ENSG00000197329 PELI1 0.7612944 0.7481606 None 0.7547275
ENSG00000130222 GADD45G 0.6503873 0.8500145 None 0.7502009
kable(top_genes[[2]], caption=paste0('Top 10 genes for module ', top_modules[2], '  (MTcor = ',
                                  round(moduleTraitCor[paste0('ME',top_modules[2]),1],2),')'))
Top 10 genes for module #EE8045 (MTcor = 0.93)
ID external_gene_id MM GS gene.score importance
ENSG00000003402 CFLAR 0.8349806 0.8167723 None 0.8258764
ENSG00000138119 MYOF 0.8095979 0.8334501 None 0.8215240
ENSG00000162745 OLFML2B 0.7794224 0.8526613 None 0.8160419
ENSG00000089159 PXN 0.7551603 0.8687338 None 0.8119471
ENSG00000124782 RREB1 0.7602334 0.8444107 None 0.8023221
ENSG00000198185 ZNF334 0.8308122 0.7574061 None 0.7941092
ENSG00000168769 TET2 0.7660493 0.8170356 3 0.7915424
ENSG00000120690 ELF1 0.7858080 0.7870686 None 0.7864383
ENSG00000163629 PTPN13 0.7149171 0.8558172 None 0.7853671
ENSG00000183808 RBM12B 0.6787244 0.8906825 None 0.7847035
ENSG00000104870 FCGRT 0.6702519 0.8567333 None 0.7634926
ENSG00000171988 JMJD1C 0.7726856 0.7508001 4 0.7617428
ENSG00000117000 RLF 0.8221159 0.7009312 None 0.7615236
ENSG00000173110 HSPA6 0.7142783 0.8087644 None 0.7615214
ENSG00000180596 HIST1H2BC 0.7506019 0.7717162 None 0.7611591
ENSG00000102531 FNDC3A 0.7265045 0.7897408 None 0.7581227
ENSG00000110719 TCIRG1 0.6831133 0.8306871 None 0.7569002
ENSG00000130066 SAT1 0.7490607 0.7561901 None 0.7526254
ENSG00000065978 YBX1 0.6670100 0.8339125 None 0.7504612
ENSG00000073792 IGF2BP2 0.6578041 0.8387673 None 0.7482857
kable(top_genes[[3]], caption=paste0('Top 10 genes for module ', top_modules[3], '  (MTcor = ',
                                  round(moduleTraitCor[paste0('ME',top_modules[3]),1],2),')'))
Top 10 genes for module #00B9E3 (MTcor = -0.93)
ID external_gene_id MM GS gene.score importance
ENSG00000050748 MAPK9 0.9018732 -0.9400140 None 0.9209436
ENSG00000108395 TRIM37 0.9037848 -0.9297931 None 0.9167889
ENSG00000138078 PREPL 0.9001507 -0.9038232 None 0.9019869
ENSG00000177432 NAP1L5 0.8683153 -0.9048326 None 0.8865739
ENSG00000155097 ATP6V1C1 0.8612597 -0.8966476 None 0.8789537
ENSG00000114573 ATP6V1A 0.8322001 -0.9172599 None 0.8747300
ENSG00000163577 EIF5A2 0.8346941 -0.9108048 None 0.8727495
ENSG00000128881 TTBK2 0.8697142 -0.8751661 None 0.8724401
ENSG00000171132 PRKCE 0.8338660 -0.9088992 None 0.8713826
ENSG00000111674 ENO2 0.8876355 -0.8482909 None 0.8679632
ENSG00000176490 DIRAS1 0.8331173 -0.8967040 None 0.8649106
ENSG00000131437 KIF3A 0.8801280 -0.8462276 None 0.8631778
ENSG00000196876 SCN8A 0.9339299 -0.7887782 3 0.8613540
ENSG00000125814 NAPB 0.8865175 -0.8311321 None 0.8588248
ENSG00000172348 RCAN2 0.7982100 -0.9149243 None 0.8565672
ENSG00000184368 MAP7D2 0.8249437 -0.8801442 None 0.8525440
ENSG00000162694 EXTL2 0.8007142 -0.8982366 None 0.8494754
ENSG00000130540 SULT4A1 0.8815880 -0.8113335 None 0.8464608
ENSG00000144285 SCN1A 0.8893987 -0.8000233 3 0.8447110
ENSG00000132639 SNAP25 0.8756003 -0.8102863 4 0.8429433
kable(top_genes[[4]], caption=paste0('Top 10 genes for module ', top_modules[4], '  (MTcor = ',
                                  round(moduleTraitCor[paste0('ME',top_modules[4]),1],2),')'))
Top 10 genes for module #84AD00 (MTcor = -0.93)
ID external_gene_id MM GS gene.score importance
ENSG00000106683 LIMK1 0.8336101 -0.8623672 None 0.8479886
ENSG00000141576 RNF157 0.8343007 -0.8485893 None 0.8414450
ENSG00000127838 PNKD 0.8393553 -0.8333891 None 0.8363722
ENSG00000140854 KATNB1 0.8646680 -0.7907368 None 0.8277024
ENSG00000148334 PTGES2 0.8037964 -0.8316634 None 0.8177299
ENSG00000006432 MAP3K9 0.7465523 -0.8872405 None 0.8168964
ENSG00000130725 UBE2M 0.7814384 -0.8501315 None 0.8157850
ENSG00000105251 SHD 0.7661369 -0.8630235 None 0.8145802
ENSG00000139190 VAMP1 0.8258018 -0.8015636 None 0.8136827
ENSG00000168993 CPLX1 0.7956338 -0.8267869 None 0.8112104
ENSG00000183837 PNMA3 0.7999070 -0.8021580 None 0.8010325
ENSG00000180155 LYNX1 0.7934936 -0.8012133 None 0.7973535
ENSG00000124191 TOX2 0.7739545 -0.8098081 None 0.7918813
ENSG00000139637 C12orf10 0.7420227 -0.8338129 None 0.7879178
ENSG00000196972 LINC00087 0.8285660 -0.7454300 None 0.7869980
ENSG00000135631 RAB11FIP5 0.7285209 -0.8340525 4 0.7812867
ENSG00000174238 PITPNA 0.6804206 -0.8774836 None 0.7789521
ENSG00000106789 CORO2A 0.7260306 -0.8209005 None 0.7734656
ENSG00000196177 ACADSB 0.7340860 -0.8051786 None 0.7696323
ENSG00000127445 PIN1 0.7966748 -0.7363993 None 0.7665370
rm(create_table)
pca = datExpr %>% prcomp

ids = c()
for(tg in top_genes) ids = c(ids, tg$ID)

plot_data = data.frame('ID'=rownames(datExpr), 'PC1' = pca$x[,1], 'PC2' = pca$x[,2]) %>%
            left_join(dataset, by='ID') %>% dplyr::select(ID, PC1, PC2, Module, gene.score) %>%
            mutate(color = ifelse(Module %in% top_modules, as.character(Module), 'gray')) %>%
            mutate(alpha = ifelse(color %in% top_modules & 
                                  ID %in% ids, 1, 0.1))

plot_data %>% ggplot(aes(PC1, PC2)) + geom_point(alpha=plot_data$alpha, color=plot_data$color) + 
              theme_minimal() + ggtitle('Important genes identified through WGCNA')

Level of expression by Diagnosis for top genes, ordered by importance (defined above)

create_plot = function(i){
  
  plot_data = datExpr[rownames(datExpr) %in% top_genes[[i]]$ID,] %>% mutate('ID' = rownames(.)) %>% 
              melt(id.vars='ID') %>% mutate(variable = gsub('X','',variable)) %>%
              left_join(top_genes[[i]], by='ID') %>%
              left_join(datMeta %>% dplyr::select(Dissected_Sample_ID, Diagnosis),
                        by = c('variable'='Dissected_Sample_ID')) %>% arrange(desc(importance))
  
  p = ggplotly(plot_data %>% mutate(external_gene_id=factor(external_gene_id, 
                                    levels=unique(plot_data$external_gene_id), ordered=T)) %>%
               ggplot(aes(external_gene_id, value, fill=Diagnosis)) + geom_boxplot() + theme_minimal() +
                      xlab(paste0('Top genes for module ', top_modules[i], ' (MTcor = ',
                       round(genes_info$MTcor[genes_info$Module==top_modules[i]][1],2), ')')) + ylab('Level of Expression') +
                      theme(axis.text.x = element_text(angle = 90, hjust = 1)))
  return(p)
  
}

create_plot(1)
create_plot(2)
create_plot(3)
create_plot(4)
rm(create_plot)




Enrichment Analysis


Using the package anRichment

# Prepare dataset

# Create dataset with top modules membership and removing the genes without an assigned module
EA_dataset = data.frame('ensembl_gene_id' = genes_info$ID,
                        module = ifelse(genes_info$Module %in% top_modules, genes_info$Module, 'other')) %>%
             filter(genes_info$Module!='gray')

# Assign Entrez Gene Id to each gene
getinfo = c('ensembl_gene_id','entrezgene')
mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl', host='feb2014.archive.ensembl.org')
biomart_output = getBM(attributes=getinfo, filters=c('ensembl_gene_id'), values=EA_dataset$ensembl_gene_id, mart=mart)
## Cache found
EA_dataset = EA_dataset %>% left_join(biomart_output, by='ensembl_gene_id')

for(tm in top_modules){
  cat(paste0('\n',sum(EA_dataset$module==tm & is.na(EA_dataset$entrezgene)), ' genes from top module ',
             tm, ' don\'t have an Entrez Gene ID')) 
}
## 
## 27 genes from top module #73B000 don't have an Entrez Gene ID
## 29 genes from top module #EE8045 don't have an Entrez Gene ID
## 27 genes from top module #00B9E3 don't have an Entrez Gene ID
## 25 genes from top module #84AD00 don't have an Entrez Gene ID
rm(getinfo, mart, biomart_output, tm)
# Manual: https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/GeneAnnotation/Tutorials/anRichment-Tutorial1.pdf

collectGarbage()

# EA_dataset = rbind(EA_dataset[EA_dataset$module!='other',], EA_dataset[EA_dataset$module=='other',][sample(sum(EA_dataset$module=='other'), 1000),])

# Prepare datasets
GO_col = buildGOcollection(organism = 'human', verbose = 0)
## Loading required package: org.Hs.eg.db
## 
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
internal_col = internalCollection(organism = 'human')
MillerAIBS_col = MillerAIBSCollection(organism = 'human')
BrainDisease_col = BrainDiseaseCollection(organism = 'human')
combined_col = mergeCollections(GO_col, internal_col, MillerAIBS_col, BrainDisease_col)

# Print collections used
cat('Using collections: ')
## Using collections:
knownGroups(combined_col, sortBy = 'size')
##  [1] "GO"                                         
##  [2] "GO.BP"                                      
##  [3] "GO.MF"                                      
##  [4] "GO.CC"                                      
##  [5] "JA Miller at AIBS"                          
##  [6] "Chip-X enrichment analysis (ChEA)"          
##  [7] "Brain"                                      
##  [8] "JAM"                                        
##  [9] "Prenatal brain"                             
## [10] "Brain region markers"                       
## [11] "Cortex"                                     
## [12] "Brain region marker enriched gene sets"     
## [13] "WGCNA"                                      
## [14] "BrainRegionMarkers"                         
## [15] "BrainRegionMarkers.HBA"                     
## [16] "BrainRegionMarkers.HBA.localMarker(top200)" 
## [17] "Postnatal brain"                            
## [18] "ImmunePathways"                             
## [19] "Markers of cortex layers"                   
## [20] "BrainLists"                                 
## [21] "Cell type markers"                          
## [22] "Germinal brain"                             
## [23] "BrainRegionMarkers.HBA.globalMarker(top200)"
## [24] "Accelerated evolution"                      
## [25] "Postmitotic brain"                          
## [26] "BrainLists.Blalock_AD"                      
## [27] "BrainLists.DiseaseGenes"                    
## [28] "BloodAtlases"                               
## [29] "Verge Disease Genes"                        
## [30] "BloodAtlases.Whitney"                       
## [31] "BrainLists.JAXdiseaseGene"                  
## [32] "BrainLists.MO"                              
## [33] "Age-associated genes"                       
## [34] "BrainLists.Lu_Aging"                        
## [35] "Cell type marker enriched gene sets"        
## [36] "BrainLists.CA1vsCA3"                        
## [37] "BrainLists.MitochondrialType"               
## [38] "BrainLists.MO.2+_26Mar08"                   
## [39] "BrainLists.MO.Sugino"                       
## [40] "BloodAtlases.Gnatenko2"                     
## [41] "BloodAtlases.Kabanova"                      
## [42] "BrainLists.Voineagu"                        
## [43] "StemCellLists"                              
## [44] "StemCellLists.Lee"
# Perform Enrichment Analysis
enrichment = enrichmentAnalysis(classLabels = EA_dataset$module, identifiers = EA_dataset$entrezgene,
                                refCollection = combined_col, #useBackground = 'given', 
                                threshold = 1e-4, thresholdType = 'Bonferroni',
                                getOverlapEntrez = FALSE, getOverlapSymbols = TRUE)
##  enrichmentAnalysis: preparing data..
##   ..working on label set 1 ..


Results


kable(enrichment$enrichmentTable %>% filter(class==top_modules[1]) %>% 
      dplyr::select(dataSetID, shortDataSetName, inGroups, Bonferroni, FDR, enrichmentRatio,
                    effectiveClassSize, effectiveSetSize, nCommonGenes) %>%
      arrange(Bonferroni, desc(enrichmentRatio)),
      caption = paste0('Enriched terms for module ', top_modules[1], ' (MTcor = ',
                       round(genes_info$MTcor[genes_info$Module==top_modules[1]][1],4), ')'))
Enriched terms for module #73B000 (MTcor = 0.9445)
dataSetID shortDataSetName inGroups Bonferroni FDR enrichmentRatio effectiveClassSize effectiveSetSize nCommonGenes
JAM:003078 Temporal Lobe_IN_Cerebral Cortex JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 0.0059431 0.0000369 2.589589 1425 158 37
JAMiller.AIBS.000169 Cerebellum-enriched genes in midfetal human brain JA Miller at AIBS|Brain|Prenatal brain|Brain region markers 0.0148439 0.0000858 2.764561 1425 124 31
GO:0072359 circulatory system development GO|GO.BP 0.0156674 0.0000893 1.556171 1425 938 132
GO:0072358 cardiovascular system development GO|GO.BP 0.0255755 0.0001371 1.688960 1425 622 95
GO:0001944 vasculature development GO|GO.BP 0.0468149 0.0002284 1.677678 1425 613 93
GO:0001568 blood vessel development GO|GO.BP 0.1391834 0.0005990 1.663463 1425 585 88
JAMiller.AIBS.000502 Genes bound by SUZ12 in mouse MESC from PMID 16625203 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 0.1891141 0.0007751 1.551789 1425 791 111
JAMiller.AIBS.000138 VZ/SZ/IZ enriched in E14.5 mouse JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex|Germinal brain 0.1944664 0.0007905 1.898554 1425 332 57
JAMiller.AIBS.000208 RegionalWGCNA midfetal M38 JA Miller at AIBS|Brain|Prenatal brain|WGCNA 0.2263597 0.0008912 1.507943 1425 902 123
JAM:002886 Hypothalamus JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.globalMarker(top200)|Brain region markers|Brain region marker enriched gene sets 0.2992571 0.0011335 2.431246 1425 141 31
kable(enrichment$enrichmentTable %>% filter(class==top_modules[2]) %>% 
      dplyr::select(dataSetID, shortDataSetName, inGroups, Bonferroni, FDR, enrichmentRatio,
                    effectiveClassSize, effectiveSetSize, nCommonGenes) %>%
      arrange(Bonferroni, enrichmentRatio),
      caption = paste0('Enriched terms for module ', top_modules[2], ' (MTcor = ',
                       round(genes_info$MTcor[genes_info$Module==top_modules[2]][1],4), ')'))
Enriched terms for module #EE8045 (MTcor = 0.9309)
dataSetID shortDataSetName inGroups Bonferroni FDR enrichmentRatio effectiveClassSize effectiveSetSize nCommonGenes
JAMiller.AIBS.000176 CortexWGCNA midfetal M6 JA Miller at AIBS|Brain|Prenatal brain|WGCNA 0.0001115 1.20e-06 1.933365 850 863 90
GO:0010556 regulation of macromolecule biosynthetic process GO|GO.BP 0.0006070 5.30e-06 1.375427 850 3464 257
GO:1903506 regulation of nucleic acid-templated transcription GO|GO.BP 0.0006810 5.90e-06 1.411367 850 3008 229
GO:0010468 regulation of gene expression GO|GO.BP 0.0008402 6.90e-06 1.344785 850 3860 280
GO:2001141 regulation of RNA biosynthetic process GO|GO.BP 0.0008644 7.00e-06 1.407623 850 3016 229
GO:0009889 regulation of biosynthetic process GO|GO.BP 0.0019537 1.41e-05 1.346451 850 3690 268
GO:0051252 regulation of RNA metabolic process GO|GO.BP 0.0020681 1.48e-05 1.377692 850 3243 241
GO:0097659 nucleic acid-templated transcription GO|GO.BP 0.0026497 1.79e-05 1.381739 850 3153 235
GO:0006355 regulation of transcription, DNA-templated GO|GO.BP 0.0027739 1.86e-05 1.397619 850 2958 223
GO:0031326 regulation of cellular biosynthetic process GO|GO.BP 0.0028170 1.88e-05 1.346881 850 3620 263
kable(enrichment$enrichmentTable %>% filter(class==top_modules[3]) %>% 
      dplyr::select(dataSetID, shortDataSetName, inGroups, Bonferroni, FDR, enrichmentRatio,
                    effectiveClassSize, effectiveSetSize, nCommonGenes) %>%
      arrange(Bonferroni, desc(enrichmentRatio)),
      caption = paste0('Enriched terms for module ', top_modules[3], ' (MTcor = ',
                       round(genes_info$MTcor[genes_info$Module==top_modules[3]][1],4), ')'))
Enriched terms for module #00B9E3 (MTcor = -0.9261)
dataSetID shortDataSetName inGroups Bonferroni FDR enrichmentRatio effectiveClassSize effectiveSetSize nCommonGenes
JAMiller.AIBS.000052 CortexWGCNA 15-21 post-conception weeks C26 JA Miller at AIBS|Brain|Prenatal brain|Cortex|WGCNA 0.00e+00 0e+00 2.707335 1701 722 211
JAMiller.AIBS.000142 Highest in CP of 13-16 post-conception weeks human JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex|Postmitotic brain 0.00e+00 0e+00 2.191886 1701 1213 287
JAMiller.AIBS.000569 WGCNA humanSpecificOlivedrab2Module frontalCtx FOXP2 JA Miller at AIBS|Brain|Postnatal brain|Cortex|WGCNA 0.00e+00 0e+00 1.523000 1701 4045 665
JAM:002769 downAD_mitochondrion JAM|BrainLists|BrainLists.Blalock_AD 0.00e+00 0e+00 3.356002 1701 265 96
JAMiller.AIBS.000150 Highest in CP of E14.5 mouse JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex|Postmitotic brain 0.00e+00 0e+00 1.874676 1701 1270 257
JAM:003016 downAD_synapticTransmission JAM|BrainLists|BrainLists.Blalock_AD 0.00e+00 0e+00 5.158343 1701 88 49
JAMiller.AIBS.000155 Lowest in VZ of E14.5 mouse JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex 0.00e+00 0e+00 1.721720 1701 1668 310
JAMiller.AIBS.000123 HippocampusWGCNA turquoise DGenriched upAge JA Miller at AIBS|Brain|Postnatal brain|WGCNA 0.00e+00 0e+00 1.916682 1701 1102 228
JAMiller.AIBS.000570 WGCNA Olivedrab2ModuleGenes with enriched ELAVL2 targets JA Miller at AIBS|Brain|Postnatal brain|Cortex|WGCNA 0.00e+00 0e+00 2.474226 1701 483 129
GO:0045202 synapse GO|GO.CC 0.00e+00 0e+00 1.839475 1701 1113 221
GO:0044456 synapse part GO|GO.CC 0.00e+00 0e+00 1.942109 1701 892 187
JAM:003072 Tail of Caudate Nucleus_IN_Striatum JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 0.00e+00 0e+00 3.509949 1701 161 61
GO:0097458 neuron part GO|GO.CC 0.00e+00 0e+00 1.643978 1701 1606 285
GO:0097060 synaptic membrane GO|GO.CC 0.00e+00 0e+00 2.389246 1701 411 106
JAMiller.AIBS.000141 CP enriched in E14.5 mouse JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex|Postmitotic brain 0.00e+00 0e+00 2.107674 1701 567 129
JAM:002764 downAging_mitochondria_synapse JAM|BrainLists|BrainLists.Lu_Aging 0.00e+00 0e+00 2.351621 1701 390 99
JAM:002751 Basal Pons JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.globalMarker(top200)|Brain region markers|Brain region marker enriched gene sets 0.00e+00 0e+00 3.180999 1701 166 57
GO:0099536 synaptic signaling GO|GO.BP 0.00e+00 0e+00 1.981063 1701 650 139
GO:0007268 chemical synaptic transmission GO|GO.BP 0.00e+00 0e+00 1.966407 1701 636 135
GO:0098916 anterograde trans-synaptic signaling GO|GO.BP 0.00e+00 0e+00 1.966407 1701 636 135
GO:0099537 trans-synaptic signaling GO|GO.BP 0.00e+00 0e+00 1.956365 1701 644 136
JAM:002744 Autism_differential_expression_across_at_least_one_comparison JAM|BrainLists|BrainLists.Voineagu 0.00e+00 0e+00 1.855218 1701 764 153
JAM:003054 subiculum_IN_Hippocampal Formation JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 0.00e+00 0e+00 3.031842 1701 165 54
GO:0034702 ion channel complex GO|GO.CC 0.00e+00 0e+00 2.514012 1701 269 73
GO:0043005 neuron projection GO|GO.CC 0.00e+00 0e+00 1.633021 1701 1214 214
JAMiller.AIBS.000005 CPi markers at 21 post-conception weeks JA Miller at AIBS|Brain|Prenatal brain|Cortex|Markers of cortex layers|Postmitotic brain 0.00e+00 0e+00 2.383013 1701 311 80
GO:1902495 transmembrane transporter complex GO|GO.CC 0.00e+00 0e+00 2.444657 1701 288 76
GO:0030424 axon GO|GO.CC 0.00e+00 0e+00 1.968995 1701 574 122
GO:0045211 postsynaptic membrane GO|GO.CC 0.00e+00 0e+00 2.376146 1701 308 79
JAM:002805 Cerebral Cortex JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.globalMarker(top200)|Brain region markers|Brain region marker enriched gene sets 0.00e+00 0e+00 2.992087 1701 161 52
JAM:002739 arcuate nucleus of medulla_IN_Myelencephalon JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 0.00e+00 0e+00 2.940060 1701 167 53
GO:1990351 transporter complex GO|GO.CC 0.00e+00 0e+00 2.378585 1701 296 76
GO:0005216 ion channel activity GO|GO.MF 0.00e+00 0e+00 2.226422 1701 362 87
JAM:002920 Lateral Nucleus_IN_Amygdala JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 0.00e+00 0e+00 2.919552 1701 165 52
GO:0098793 presynapse GO|GO.CC 1.00e-07 0e+00 2.049727 1701 461 102
GO:0022838 substrate-specific channel activity GO|GO.MF 1.00e-07 0e+00 2.178283 1701 370 87
GO:0034220 ion transmembrane transport GO|GO.BP 2.00e-07 0e+00 1.648519 1701 1034 184
GO:0022890 inorganic cation transmembrane transporter activity GO|GO.MF 2.00e-07 0e+00 1.964537 1701 514 109
GO:0015318 inorganic molecular entity transmembrane transporter activity GO|GO.MF 3.00e-07 0e+00 1.802788 1701 704 137
JAMiller.AIBS.000095 Cortical PNOC neurons JA Miller at AIBS|Brain|Postnatal brain|Cell type markers|Cortex 3.00e-07 0e+00 1.280862 1701 3949 546
GO:0022803 passive transmembrane transporter activity GO|GO.MF 3.00e-07 0e+00 2.124981 1701 388 89
JAM:002882 Hippocampal Formation JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.globalMarker(top200)|Brain region markers|Brain region marker enriched gene sets 4.00e-07 0e+00 2.812274 1701 168 51
GO:0006812 cation transport GO|GO.BP 5.00e-07 0e+00 1.648359 1701 1006 179
GO:0022839 ion gated channel activity GO|GO.MF 5.00e-07 0e+00 2.308086 1701 293 73
GO:0015267 channel activity GO|GO.MF 7.00e-07 0e+00 2.106534 1701 387 88
GO:0022836 gated channel activity GO|GO.MF 8.00e-07 0e+00 2.269977 1701 302 74
JAMiller.AIBS.000463 Genes bound by SMAD4 in HUMAN A2780 from PMID 21799915 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 1.00e-06 0e+00 1.414957 1701 2082 318
GO:0008324 cation transmembrane transporter activity GO|GO.MF 1.70e-06 0e+00 1.882784 1701 556 113
GO:0034703 cation channel complex GO|GO.CC 2.30e-06 0e+00 2.555576 1701 203 56
JAMiller.AIBS.000042 CortexWGCNA 15-21 post-conception weeks C16 SPenriched JA Miller at AIBS|Brain|Prenatal brain|Cortex|WGCNA 2.60e-06 0e+00 2.573323 1701 198 55
GO:0098660 inorganic ion transmembrane transport GO|GO.BP 2.70e-06 0e+00 1.750569 1701 725 137
GO:0005261 cation channel activity GO|GO.MF 4.60e-06 1e-07 2.266714 1701 282 69
GO:0015672 monovalent inorganic cation transport GO|GO.BP 4.90e-06 1e-07 1.967567 1701 452 96
JAM:002824 Dentate Nucleus_IN_Cerebellar Nucleus JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 6.60e-06 1e-07 2.711404 1701 164 48
GO:0098655 cation transmembrane transport GO|GO.BP 6.60e-06 1e-07 1.721512 1701 748 139
GO:0098794 postsynapse GO|GO.CC 8.20e-06 1e-07 1.821389 1701 590 116
JAM:002918 lateral medullary reticular group_IN_Myelencephalon JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 1.15e-05 2e-07 2.704387 1701 161 47
GO:0015075 ion transmembrane transporter activity GO|GO.MF 1.73e-05 2e-07 1.698800 1701 758 139
GO:0015077 monovalent inorganic cation transmembrane transporter activity GO|GO.MF 1.82e-05 2e-07 2.135617 1701 321 74
GO:0099240 intrinsic component of synaptic membrane GO|GO.CC 2.77e-05 4e-07 2.707002 1701 154 45
GO:0098662 inorganic cation transmembrane transport GO|GO.BP 2.78e-05 4e-07 1.748449 1701 657 124
GO:0006811 ion transport GO|GO.BP 4.13e-05 5e-07 1.468754 1701 1457 231
JAMiller.AIBS.000503 Genes bound by SUZ12 in mouse MESC from PMID 18555785 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 4.86e-05 6e-07 1.677725 1701 762 138
GO:0120025 plasma membrane bounded cell projection GO|GO.CC 6.43e-05 7e-07 1.393951 1701 1914 288
JAM:002991 downAD_synapticTransmission JAM|BrainLists|BrainLists.Blalock_AD 9.44e-05 1e-06 3.151657 1701 97 33
kable(enrichment$enrichmentTable %>% filter(class==top_modules[4]) %>% 
      dplyr::select(dataSetID, shortDataSetName, inGroups, Bonferroni, FDR, enrichmentRatio,
                    effectiveClassSize, effectiveSetSize, nCommonGenes) %>%
      arrange(Bonferroni, desc(enrichmentRatio)),
      caption = paste0('Enriched terms for module ', top_modules[4], ' (MTcor = ',
                       round(genes_info$MTcor[genes_info$Module==top_modules[4]][1],4), ')'))
Enriched terms for module #84AD00 (MTcor = -0.9309)
dataSetID shortDataSetName inGroups Bonferroni FDR enrichmentRatio effectiveClassSize effectiveSetSize nCommonGenes
JAMiller.AIBS.000124 HippocampusWGCNA yellow JA Miller at AIBS|Brain|Postnatal brain|WGCNA 0.0000038 0.0000001 2.175347 856 677 80
GO:0005759 mitochondrial matrix GO|GO.CC 0.0452945 0.0002231 2.105051 856 446 51
JAMiller.AIBS.000048 CortexWGCNA 15-21 post-conception weeks C22 CPenriched enrichedForAutismGenes JA Miller at AIBS|Brain|Prenatal brain|Cortex|WGCNA 0.1443667 0.0006143 1.877221 856 608 62
JAMiller.AIBS.000206 RegionalWGCNA midfetal M36 JA Miller at AIBS|Brain|Prenatal brain|WGCNA 0.2920425 0.0011147 2.492032 856 229 31
GO:0098798 mitochondrial protein complex GO|GO.CC 0.3202003 0.0012038 2.401158 856 253 33
GO:0044429 mitochondrial part GO|GO.CC 0.5590261 0.0019303 1.625425 856 974 86
JAM:002985 Parietal Lobe_IN_Cerebral Cortex JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 1.0000000 0.0076641 3.012362 856 110 18
GO:0034470 ncRNA processing GO|GO.BP 1.0000000 0.0067087 2.028097 856 354 39
JAMiller.AIBS.000044 CortexWGCNA 15-21 post-conception weeks C18 JA Miller at AIBS|Brain|Prenatal brain|Cortex|WGCNA 1.0000000 0.0102615 1.664224 856 719 65
GO:0005739 mitochondrion GO|GO.CC 1.0000000 0.0047036 1.464200 856 1471 117

Save Enrichment Analysis results

save(enrichment, file='./../Data/enrichmentAnalysis.RData')
#load('./../Data/enrichmentAnalysis.RData')



Session info

sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: Scientific Linux 7.6 (Nitrogen)
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/R/lib/libRblas.so
## 
## locale:
##  [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
##  [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
##  [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    parallel  stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] org.Hs.eg.db_3.10.0                      
##  [2] BrainDiseaseCollection_1.00              
##  [3] anRichment_1.01-2                        
##  [4] TxDb.Mmusculus.UCSC.mm10.knownGene_3.10.0
##  [5] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2  
##  [6] GenomicFeatures_1.38.2                   
##  [7] GenomicRanges_1.38.0                     
##  [8] GenomeInfoDb_1.22.0                      
##  [9] anRichmentMethods_0.90-1                 
## [10] WGCNA_1.68                               
## [11] fastcluster_1.1.25                       
## [12] dynamicTreeCut_1.63-1                    
## [13] GO.db_3.10.0                             
## [14] AnnotationDbi_1.48.0                     
## [15] IRanges_2.20.2                           
## [16] S4Vectors_0.24.3                         
## [17] Biobase_2.46.0                           
## [18] BiocGenerics_0.32.0                      
## [19] biomaRt_2.42.0                           
## [20] knitr_1.24                               
## [21] doParallel_1.0.15                        
## [22] iterators_1.0.12                         
## [23] foreach_1.4.7                            
## [24] polycor_0.7-10                           
## [25] expss_0.10.1                             
## [26] GGally_1.4.0                             
## [27] gridExtra_2.3                            
## [28] viridis_0.5.1                            
## [29] viridisLite_0.3.0                        
## [30] RColorBrewer_1.1-2                       
## [31] dendextend_1.13.3                        
## [32] plotly_4.9.2                             
## [33] glue_1.3.1                               
## [34] reshape2_1.4.3                           
## [35] forcats_0.4.0                            
## [36] stringr_1.4.0                            
## [37] dplyr_0.8.3                              
## [38] purrr_0.3.3                              
## [39] readr_1.3.1                              
## [40] tidyr_1.0.2                              
## [41] tibble_2.1.3                             
## [42] ggplot2_3.2.1                            
## [43] tidyverse_1.3.0                          
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1                backports_1.1.5            
##   [3] Hmisc_4.2-0                 BiocFileCache_1.10.2       
##   [5] plyr_1.8.5                  lazyeval_0.2.2             
##   [7] splines_3.6.0               crosstalk_1.0.0            
##   [9] BiocParallel_1.20.1         robust_0.4-18.2            
##  [11] digest_0.6.24               htmltools_0.4.0            
##  [13] fansi_0.4.1                 magrittr_1.5               
##  [15] checkmate_1.9.4             memoise_1.1.0              
##  [17] fit.models_0.5-14           cluster_2.0.8              
##  [19] annotate_1.64.0             Biostrings_2.54.0          
##  [21] modelr_0.1.5                matrixStats_0.55.0         
##  [23] askpass_1.1                 prettyunits_1.0.2          
##  [25] colorspace_1.4-1            blob_1.2.1                 
##  [27] rvest_0.3.5                 rappdirs_0.3.1             
##  [29] rrcov_1.4-7                 haven_2.2.0                
##  [31] xfun_0.8                    crayon_1.3.4               
##  [33] RCurl_1.95-4.12             jsonlite_1.6               
##  [35] genefilter_1.68.0           impute_1.60.0              
##  [37] survival_2.44-1.1           gtable_0.3.0               
##  [39] zlibbioc_1.32.0             XVector_0.26.0             
##  [41] DelayedArray_0.12.2         DEoptimR_1.0-8             
##  [43] scales_1.1.0                mvtnorm_1.0-11             
##  [45] DBI_1.1.0                   Rcpp_1.0.3                 
##  [47] xtable_1.8-4                progress_1.2.2             
##  [49] htmlTable_1.13.1            foreign_0.8-71             
##  [51] bit_1.1-15.2                preprocessCore_1.48.0      
##  [53] Formula_1.2-3               htmlwidgets_1.5.1          
##  [55] httr_1.4.1                  ellipsis_0.3.0             
##  [57] acepack_1.4.1               farver_2.0.3               
##  [59] pkgconfig_2.0.3             reshape_0.8.8              
##  [61] XML_3.99-0.3                nnet_7.3-12                
##  [63] dbplyr_1.4.2                locfit_1.5-9.1             
##  [65] later_1.0.0                 labeling_0.3               
##  [67] tidyselect_0.2.5            rlang_0.4.4                
##  [69] munsell_0.5.0               cellranger_1.1.0           
##  [71] tools_3.6.0                 cli_2.0.1                  
##  [73] generics_0.0.2              RSQLite_2.2.0              
##  [75] broom_0.5.4                 fastmap_1.0.1              
##  [77] evaluate_0.14               yaml_2.2.0                 
##  [79] bit64_0.9-7                 fs_1.3.1                   
##  [81] robustbase_0.93-5           nlme_3.1-139               
##  [83] mime_0.9                    xml2_1.2.2                 
##  [85] compiler_3.6.0              rstudioapi_0.10            
##  [87] curl_4.3                    reprex_0.3.0               
##  [89] geneplotter_1.64.0          pcaPP_1.9-73               
##  [91] stringi_1.4.6               highr_0.8                  
##  [93] lattice_0.20-38             Matrix_1.2-17              
##  [95] vctrs_0.2.2                 pillar_1.4.3               
##  [97] lifecycle_0.1.0             data.table_1.12.8          
##  [99] bitops_1.0-6                httpuv_1.5.2               
## [101] rtracklayer_1.46.0          R6_2.4.1                   
## [103] latticeExtra_0.6-28         promises_1.1.0             
## [105] codetools_0.2-16            MASS_7.3-51.4              
## [107] assertthat_0.2.1            SummarizedExperiment_1.16.1
## [109] DESeq2_1.26.0               openssl_1.4.1              
## [111] withr_2.1.2                 GenomicAlignments_1.22.1   
## [113] Rsamtools_2.2.2             GenomeInfoDbData_1.2.2     
## [115] hms_0.5.3                   grid_3.6.0                 
## [117] rpart_4.1-15                rmarkdown_1.14             
## [119] Cairo_1.5-10                shiny_1.4.0                
## [121] lubridate_1.7.4             base64enc_0.1-3